Air Quality Module

Author: Jennifer Atkins

This is an example of plotting air quality data from two different csv files and the correlation between them.



In [1]:

    
# Plotting related python libraries
import matplotlib.pyplot as plt

# Standard csv python library
import csv

# Main python library for mathematical calculations
import numpy as np

# Python libraries for manipulating dates and times as objects
import time
import datetime
import dateutil



In [ ]:

    
def correlation_coefficient(xdata,ydata):
    #Calculates the correlation coefficient between the two data files
    # 'x' and 'y' data respresent the two data sets being compared
	xmean = np.mean(xdata) #Calculating the average in each data set
	ymean = np.mean(ydata)
	xsigma = np.sqrt(np.var(xdata)) #Calculating the standard deviation in each data
	ysigma = np.sqrt(np.var(ydata))
	xysums = 0 
	for i in range(len(xdata)): #Calcuating the sum of x and y data
		xdiff = xdata[i] - xmean
		ydiff = ydata[i] - ymean
		xysums = xdiff * ydiff +xysums
	stnddevs = xsigma * ysigma
	coeff = xysums/stnddevs/len(xdata)
	return coeff



In [ ]:

    
#Opens csv files chosen by the user
user_file1 = input("File Name 1: ")
resultsa = csv.reader(open(user_file1), delimiter=',')

user_file2 = input("File Name 2: ")
resultsb = csv.reader(open(user_file2), delimiter=',')



In [ ]:

    
timesa = []
timesb = []
Val25a = []
Val25b = []

row_countera= 0
for r in resultsa:
    #Skip first row (Row that specifies fields)
    row_countera += 1
    if row_countera>1:
        #Append each column in CSV to a separate list
        timesa.append(dateutil.parser.parse(r[0]))
        Val25a.append(int(r[8]))

row_counterb= 0
for r in resultsb:
    row_counterb += 1
    if row_counterb>1:
        timesb.append(dateutil.parser.parse(r[0]))
        Val25b.append(int(r[8]))



In [ ]:

    
#Choose the number of data points to combine and average
n_merge = int(input("n data points to combine:"))
ndata_a = len(Val25a)
ndata_b = len(Val25b)
nsum_data_a= int(ndata_a/n_merge)
nsum_data_b= int(ndata_b/n_merge)



In [ ]:

    
data_ave_a = []
data_ave_b = []
data_unc_a = []
data_unc_b = []
merge_times_a = []
merge_times_b = []

for i in range(nsum_data_a):
    #Calculate the mean and standard deviation
	idata = Val25a[i*n_merge:(i+1)*n_merge]
	idata_array = np.asarray(idata) #Convert 'idata' to a numpy array
	aqmean = np.mean(idata_array)
	aqsigma = np.sqrt(np.var(idata_array))
	data_ave_a.append(aqmean)
	data_unc_a.append(aqsigma)
	itimes = timesa[i*n_merge:(i+1)*n_merge]
	itime = itimes[int(len(itimes)/2)]
	merge_times_a.append(itime)

for i in range(nsum_data_b):
    #Calculate the mean and standard deviation
	idata = Val25b[i*n_merge:(i+1)*n_merge]
	idata_array = np.asarray(idata)
	aqmean = np.mean(idata_array)
	aqsigma = np.sqrt(np.var(idata_array))
	data_ave_b.append(aqmean)
	data_unc_b.append(aqsigma)
	itimes = timesb[i*n_merge:(i+1)*n_merge]
	itime = itimes[int(len(itimes)/2)]
	merge_times_b.append(itime)



In [ ]:

    
fig = plt.figure() 

#Plotting the first graph, each data set on two separate lines
plt.figure(1)
plt.plot(merge_times_a, data_ave_a, "b.", label='File 1')
plt.plot(merge_times_b, data_ave_b, "g.", label = 'File 2')
plt.legend(loc="best")
plt.xlabel("Time")
plt.ylabel("Particle Concentration 2.5")
file_title = "Air Quality Test Results"
plt.title(file_title)
fig.autofmt_xdate()



In [ ]:

    
#Calculate the correlation coefficient of the data from the two sensors
data_arrayA = np.asarray(data_ave_a)
data_arrayB = np.asarray(data_ave_b)
index = [0, len(data_arrayB) - 1, len(data_arrayB - 2), len(data_arrayB) - 3, len(data_arrayB) - 4]
data_arrayB = np.delete(data_arrayB, index) #If the data is off by a few data points, delete the extra points from the array

corr_coeff = correlation_coefficient(data_arrayA, data_arrayB)
corr_statemnt = ('Correlation coefficient = ', corr_coeff)



In [ ]:

    
#Graph the correlation between the sensors
plt.figure(2)
plt.plot(data_arrayA, data_arrayB, "b.")
plt.xlabel("Sensor 1")
plt.ylabel("Sensor 2")
file_title2 = "AQ Sensor Correlation"
plt.title(file_title2)

#Print correlation coefficient
print(corr_statemnt)

plt.show()